# Prevent printing of warnings and such in the HTML
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

library(tidyverse)
library(multitaper)
library(GenomicAlignments)
library(parallel)

This is to be done using the multitaper package https://github.com/krahim/multitaper/.

Normalize the data

bam.locs <- dir("../../alignment/hisat2/output", pattern = ".bam", recursive = TRUE, full.names = TRUE)

names(bam.locs) <- bam.locs %>%
  str_split(pattern = "/") %>%
  map(function(x){x[[6]]}) %>%
  str_remove(".bam") %>%
  str_replace("AraR", "REL60")

Read the bams in, only keeping + strand reads and excluding ERCCs and tRNAs.

bam.list <- mclapply(bam.locs, function(x){
  b1 <- readGAlignments(x)
  
  b2 <- b1[strand(b1) == "+" & grepl("ECB_", seqnames(b1))]
  
  return(b2)
}, mc.cores = 40)

Calculate periodicity, this seems to want to return a graph no matter what.

df.list <- lapply(bam.list, function(x){
  # count number of reads at each end point
  t1 <- table(factor(end(x), levels = 15:150))
  
  # apply spec function
  t2 <- spec.pgram(as.ts(t1))
  
  # get a df of graphable parts, rescale the spec height to 0-1 so it's comparable across samples
  t3 <- tibble(period = 1/t2$freq,
               spec = t2$spec)

  # return said df
  return(t3)
})

Bind all those together

df1 <- bind_rows(df.list, .id = "sample") %>%
  separate(sample, into = c("rep", "seqtype", "line"), sep = "-") %>% 
  filter(is.finite(period) & period <= 4) # only retain a small range

df1$line <- gsub("M", "-", df1$line) %>%
  gsub("P", "+", .) %>% 
  gsub("AraR", "REL0", .)

# save it
write_csv(df1, "../../data_frames/three_nt_periodicity.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] GenomicAlignments_1.28.0    Rsamtools_2.8.0            
##  [3] Biostrings_2.60.0           XVector_0.32.0             
##  [5] SummarizedExperiment_1.22.0 Biobase_2.52.0             
##  [7] MatrixGenerics_1.4.0        matrixStats_0.59.0         
##  [9] GenomicRanges_1.44.0        GenomeInfoDb_1.28.0        
## [11] IRanges_2.26.0              S4Vectors_0.30.0           
## [13] BiocGenerics_0.38.0         multitaper_1.0-15          
## [15] forcats_0.5.1               stringr_1.4.0              
## [17] dplyr_1.0.6                 purrr_0.3.4                
## [19] readr_1.4.0                 tidyr_1.1.3                
## [21] tibble_3.1.2                ggplot2_3.3.3              
## [23] tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2             sass_0.4.0             jsonlite_1.7.2        
##  [4] modelr_0.1.8           bslib_0.2.5.1          assertthat_0.2.1      
##  [7] highr_0.9              GenomeInfoDbData_1.2.6 cellranger_1.1.0      
## [10] yaml_2.2.1             pillar_1.6.1           backports_1.2.1       
## [13] lattice_0.20-44        glue_1.4.2             digest_0.6.27         
## [16] rvest_1.0.0            colorspace_2.0-1       htmltools_0.5.1.1     
## [19] Matrix_1.3-4           pkgconfig_2.0.3        broom_0.7.6           
## [22] haven_2.4.1            zlibbioc_1.38.0        scales_1.1.1          
## [25] BiocParallel_1.26.0    generics_0.1.0         ellipsis_0.3.2        
## [28] withr_2.4.2            cli_2.5.0              magrittr_2.0.1        
## [31] crayon_1.4.1           readxl_1.3.1           evaluate_0.14         
## [34] fs_1.5.0               fansi_0.5.0            xml2_1.3.2.9001       
## [37] tools_4.1.0            hms_1.1.0              lifecycle_1.0.0       
## [40] munsell_0.5.0          reprex_2.0.0           DelayedArray_0.18.0   
## [43] compiler_4.1.0         jquerylib_0.1.4        rlang_0.4.11          
## [46] grid_4.1.0             RCurl_1.98-1.3         rstudioapi_0.13       
## [49] bitops_1.0-7           rmarkdown_2.8          gtable_0.3.0          
## [52] DBI_1.1.1              R6_2.5.0               lubridate_1.7.10      
## [55] knitr_1.33             utf8_1.2.1             stringi_1.6.2         
## [58] Rcpp_1.0.6             vctrs_0.3.8            dbplyr_2.1.1          
## [61] tidyselect_1.1.1       xfun_0.23